***********************************************************************************
* Descriptive Statistics
* last modified 7/14/2025
*
*
***********************************************************************************

clear
capture log close 
set more off

***************
* Directories *
***************

cd "/Users/"

global dofile			=	"./Code/NEW-OEWS"
global matrix_data		=   "./Data/National Employment Matrix"
global oews_data		= 	"./Data/OEWS/all_excel"
global onet_data		= 	"./OccLink/ONET/ONET data/merged"
global cps_data			= 	"./Data/CPS/IPUMS/00015"
global xwalk 			=	"./Data/Crosswalk"
global results 			=	"./Results"
global temp				= 	"./Temp"
global logfile          =   "./Results"


log using "${logfile}/TableE1.log", replace
 
 
***********************************************************************
* Table E1 Descriptive Statistics for All Workers, Movers, and Stayers *
***********************************************************************

use "${temp}/cps_2000-2020_wide.dta", replace

gen mover = 1 if (occ_t0 != occ_t1) & occ_t0 < 9800 & occ_t1 < 9800 & outlook_gr_t1 != .
gen stayer = 1 if occ_t0 == occ_t1  & occ_t0 < 9800 & occ_t1 < 9800  & outlook_gr_t1 != .

drop if emp_pc_growth_t0 > 400 /* drop outliers about 1.4% dropped*/

mark good if occ_t0 < 9800 & !missing(emp_chng_rate_t0, proj_pc_growth_t0, outlook_gr_t0, male, age_gr, race, hispanic, edu_gr, year) /*civilian, employed population */

/* All workers */
sum  emp_pc_growth_t0 proj_pc_growth_t0 adj_annual_mean_t0 if good == 1
tab1 outlook_gr_t0 male age_gr race hispanic edu_gr year if good == 1

/* Stayers */
sum emp_pc_growth_t0 proj_pc_growth_t0 adj_annual_mean_t0 if good == 1 & stayer == 1 
tab1 outlook_gr_t0 male age_gr race hispanic edu_gr year if good == 1 & stayer == 1 

/* Movers */
sum emp_pc_growth_t0 proj_pc_growth_t0 adj_annual_mean_t0 if good == 1 & mover == 1 
tab1 outlook_gr_t0 male age_gr race hispanic edu_gr year if good == 1 & mover == 1

log close
 

log using "${logfile}/TableE2.log", replace
 
***********************************************************************
* Table E2 Descriptive Statistics at the Occupation Level              *
***********************************************************************

use "${temp}/oews_matrix_ooh2000-2020_ipums_occ.dta", replace

drop if emp_pc_growth > 400 /* drop outliers about 9% dropped*/

bys outlook_gr: sum  empsize projsize proj_pc_growth annual_mean emp_chng_rate emp_pc_growth 

log close
